seqlength = 4096
batchsize = 1
accumulate_steps = 32
theoryflops = -1
train_steps = 1000
tensor_parallel = 1
pipeline_parallel = 1
